Annual Outcome

#Setup

Read in packages and functions

source("https://raw.githubusercontent.com/CSISdefense/R-scripts-and-data/master/helper.r")
## 
## Attaching package: 'lubridate'
## The following object is masked from 'package:plyr':
## 
##     here
## The following object is masked from 'package:base':
## 
##     date
source("https://raw.githubusercontent.com/CSISdefense/R-scripts-and-data/master/lookups.r")
source("DIIGstat.r")
## Loading required package: MASS
## Loading required package: Matrix
## Loading required package: lme4
## 
## arm (Version 1.10-1, built: 2018-4-12)
## Working directory is H:/Users/Greg/Repositories/Vendor
## 
## Attaching package: 'arm'
## The following object is masked from 'package:scales':
## 
##     rescale
## 
## Attaching package: 'dplyr'
## The following object is masked from 'package:MASS':
## 
##     select
## The following objects are masked from 'package:lubridate':
## 
##     intersect, setdiff, union
## The following objects are masked from 'package:plyr':
## 
##     arrange, count, desc, failwith, id, mutate, rename, summarise,
##     summarize
## The following objects are masked from 'package:stats':
## 
##     filter, lag
## The following objects are masked from 'package:base':
## 
##     intersect, setdiff, setequal, union
## Loading required package: carData
## 
## Attaching package: 'car'
## The following object is masked _by_ '.GlobalEnv':
## 
##     Boxplot
## The following object is masked from 'package:dplyr':
## 
##     recode
## The following object is masked from 'package:arm':
## 
##     logit
source("https://raw.githubusercontent.com/CSISdefense/Crisis-Funding/master/ContractCleanup.r")
## Loading required package: lattice
## Loading required package: survival
## Loading required package: Formula
## 
## Attaching package: 'Hmisc'
## The following object is masked _by_ '.GlobalEnv':
## 
##     subplot
## The following objects are masked from 'package:dplyr':
## 
##     src, summarize
## The following objects are masked from 'package:plyr':
## 
##     is.discrete, summarize
## The following objects are masked from 'package:base':
## 
##     format.pval, units
library(csis360)
library(ggplot2)
library(scales)
library(Hmisc)
library(dplyr)
# Coloration<-read.csv(
#     paste(Path,"Lookups\\","lookup_coloration.csv",sep=""),
#     header=TRUE, sep=",", na.strings="", dec=".", strip.white=TRUE, 
#     stringsAsFactors=FALSE
#     )
# 
# Coloration<-ddply(Coloration
#                   , c(.(R), .(G), .(B))
#                   , mutate
#                   , ColorRGB=as.character(
#                       if(min(is.na(c(R,G,B)))) {NA} 
#                       else {rgb(max(R),max(G),max(B),max=255)}
#                       )
#                   )

axis.text.size<-10
strip.text.size<-10
legend.text.size<-8
# table.text.size<-5.75
title.text.size<-12
geom.text.size<-12

main.text.size<-1
note.text.size<-1.40

all_labeled<-function(data){
  subset(data,
                                        !is.na(Dur) & 
               !is.na(Ceil) &
               !is.na(CRai) & 
           !is.na(Term))
}

only_complete<-function(data){
  data<-all_labeled(data)
  subset(data,(LastCurrentCompletionDate<=as.Date("2016-09-30") |
              IsClosed==1) &
           UnmodifiedCurrentCompletionDate<as.Date("2016-09-30"))
}

Contracts are classified using a mix of numerical and categorical variables. While the changes in numerical variables are easy to grasp and summarize, a contract may have one line item that is competed and another that is not. As is detailed in the exploration on R&D, we are only considering information available prior to contract start. The percentage of contract obligations that were competed is a valuable benchmark, but is highly influenced by factors that occured after contract start..

Read in data

load(file="Data/defense_contract_all.RData")
# debug(transform_contract)
def_all<-transform_contract(def_all)


def_all<-FormatContractModel(def_all)
## Warning: Unknown or uninitialised column: 'LowCeil'.

## Warning: Unknown or uninitialised column: 'LowCeil'.
## Warning: Unknown or uninitialised column: 'NChg'.
head(def_all)
## # A tibble: 6 x 39
## # Groups:   Ceil [2]
##   CSIScontractID StartFY Action.Obligation LastCurrentCompletionDate
##            <int>   <int>             <dbl> <date>                   
## 1        3375818    2006            92160  2006-09-30               
## 2        4000840    2006             3097  2006-09-23               
## 3       21538471    2005             3574  2004-11-08               
## 4       10123906    2006         20613770. 2008-12-31               
## 5        5261947    2011             6500  2011-10-29               
## 6       63603967    2016             3470. 2015-12-09               
## # ... with 35 more variables:
## #   UnmodifiedContractBaseAndAllOptionsValue <dbl>, UnmodifiedDays <dbl>,
## #   Dur <ord>, Ceil <ord>, CBre <ord>,
## #   ChangeOrderBaseAndAllOptionsValue <dbl>,
## #   UnmodifiedNumberOfOffersReceived <int>,
## #   UnmodifiedCurrentCompletionDate <date>, IsClosed <fct>, Term <fct>,
## #   SumOfisChangeOrder <int>, b_CBre <dbl>, j_CBre <dbl>, b_Term <dbl>,
## #   j_Term <dbl>, pChangeOrderUnmodifiedBaseAndAll <dbl>,
## #   pChange3Sig <dbl>, CRai <fct>, n_CBre <dbl>, l_CBre <dbl>,
## #   l_Ceil <dbl>, ceil.median.wt <dbl>, Ceil.Simple <ord>, Ceil.Big <ord>,
## #   Ceil.1m <ord>, l_Days <dbl>, UnmodifiedYearsFloat <dbl>,
## #   UnmodifiedYearsCat <dbl>, Dur.Simple <ord>, cl_Ceil <dbl>,
## #   cl_Days <dbl>, TermNum <int>, ObligationWT <dbl>, NChg <fct>,
## #   ContractCount <dbl>
write.csv(subset(def_all,Term=="Terminated"),"Terminated.csv")
def_all<-subset(def_all,  StartFY>=2007 & 
                                               StartFY<=2015 
                )

A Histogram of the IsTerminated data showing the distribution of whether or not a contract was terminated each year from 2007.

# TerminatedDurSummary<-ddply(subset(def_all,StartFY>=2007 & 
#                   !is.na(Ceil)&
#                   UnmodifiedCompletionDate<=as.Date("2015-09-30")&
#                       !is.na(Term)),
#                             .(Ceil,
#                               Dur,
#                               StartFY,
#                               Term
#                             ),
#                             dplyr::summarise,
#                             Action.Obligation=sum(Action.Obligation),
#                             Count=length(CSIScontractID)
#                   )
# 
# 
# TerminatedDurSummary<-ddply(TerminatedDurSummary,.(Ceil,
#                                                   Dur,
#                                              StartFY
#                                              ),transform,
#                       pContractCeilDurStart=Count/sum(Count),
#                       pObligationCeilDurStart=Action.Obligation/sum(Action.Obligation)
#       )
# 
# 
# ggplot(TerminatedDurSummary,
#        aes(x=StartFY,
#            y=Count,
#            color=Term))+geom_line()+    geom_point(aes(shape=metric))+facet_grid(Ceil ~ Dur ) +scale_y_log10(labels=scales::comma)
# 
# 
# 
# 
# 
# 
# ggplot(
#   data = TerminatedEndSummary,
#   aes_string(x = "Term"),
#   ) + geom_bar() + 
#     facet_grid( Ceil ~ .,
#                 scales = "free_y",
#                 space = "free_y") + scale_y_continuous(expand = c(0,50)) 
# 
# 
# 
# 
# 
# ggplot(
#   data = subset(TerminatedEndSummary,Term=="Terminated"),
#   aes_string(x = "Ceil")
#   )+ geom_bar()+
#     scale_x_discrete("Original Ceiling (Current $ Value)")+scale_y_continuous("Number of Partially or Completely \nTerminated Contracts",labels = comma)+theme(axis.text.x=element_text(angle=90,size=12))
# 
# 
# 
# 
# 
# 
# TerminatedEndSummary$Graph[TerminatedEndSummary$Term=="Terminated"]<-TRUE
# 
# TerminatedEndSummary$Graph[TerminatedEndSummary$Term=="Unterminated"]<-FALSE
# 
# 
# head(TerminatedEndSummary)
# 
# ggplot(
#   data = subset(TerminatedEndSummary,Term=="Terminated"),
#   aes(x = Ceil,weight=Action.Obligation/1000000000)
#   )+ geom_bar()+
#     scale_x_discrete("Original Ceiling (Current $ Value)")+scale_y_continuous("Obligations to Partially or Completely\nTerminated Contracts (Current $ Billions)",labels = comma)+theme(axis.text.x=element_text(angle=90,size=12))
# 
# 
# ggplot(
#   data = subset(TerminatedEndSummary,Term=="Terminated"),
#   aes_string(x = "Ceil",weight="pContract")
# #   main="Percentage of Contracts going to Partially or Completely Terminated Contracts\nBy Initial Contract Ceiling"
#   )+ geom_bar()+ scale_y_continuous("Percent of Contracts Partially or Completely Terminated\nby Original Ceiling Category", labels=percent)+
#     scale_x_discrete("Original Ceiling (Current $ Value)")+theme(axis.text.x=element_text(angle=90,size=12))
# 
# 
# ggplot(
#   data = subset(TerminatedEndSummary,Term=="Terminated"),
#   aes_string(x = "Ceil",weight="pObligation"),
#   main="Percentage of Contract Obligations going to Partially or Completely Terminated Contracts\nBy Initial Contract Ceiling"
#   )+ geom_bar()+ scale_y_continuous("Percent of Obligations to Terminated Contracts \nin Original Ceiling Category", labels=percent)+
#     scale_x_discrete("Original Ceiling (Current $ Value)")+theme(axis.text.x=element_text(angle=90,size=12))
# 
# 
# # 
# # LatticePercentLineWrapper("VAR.name"
# #                                     ,"VAR.proper.name"
# #                                     ,"VAR.X.label"
# #                                     ,"VAR.Y.label"
# #                                     ,Coloration
# #                                     ,subset(TerminatedEndSummary,!is.na(Term))
# #                                     ,NULL
# #                                     ,"Ceil"
# #                                     ,"Count"
# #                                     ,"Term"
# #                                     ,NA
# #                                     ,NA
# #                                     )
# 
# # 
# # + 
# #     facet_grid( Ceil ~ .,
# #                 scales = "free_y",
# #                 space = "free_y") 
# # 

head(def_all)
## # A tibble: 6 x 39
## # Groups:   Ceil [2]
##   CSIScontractID StartFY Action.Obligation LastCurrentCompletionDate
##            <int>   <int>             <dbl> <date>                   
## 1        5261947    2011             6500  2011-10-29               
## 2       22544223    2009             7687  2009-07-20               
## 3        9334467    2010            22000  2010-11-20               
## 4       61736309    2015              779. 2014-12-18               
## 5       22071327    2009             4406  2010-08-16               
## 6       62898001    2015              248. 2015-02-13               
## # ... with 35 more variables:
## #   UnmodifiedContractBaseAndAllOptionsValue <dbl>, UnmodifiedDays <dbl>,
## #   Dur <ord>, Ceil <ord>, CBre <ord>,
## #   ChangeOrderBaseAndAllOptionsValue <dbl>,
## #   UnmodifiedNumberOfOffersReceived <int>,
## #   UnmodifiedCurrentCompletionDate <date>, IsClosed <fct>, Term <fct>,
## #   SumOfisChangeOrder <int>, b_CBre <dbl>, j_CBre <dbl>, b_Term <dbl>,
## #   j_Term <dbl>, pChangeOrderUnmodifiedBaseAndAll <dbl>,
## #   pChange3Sig <dbl>, CRai <fct>, n_CBre <dbl>, l_CBre <dbl>,
## #   l_Ceil <dbl>, ceil.median.wt <dbl>, Ceil.Simple <ord>, Ceil.Big <ord>,
## #   Ceil.1m <ord>, l_Days <dbl>, UnmodifiedYearsFloat <dbl>,
## #   UnmodifiedYearsCat <dbl>, Dur.Simple <ord>, cl_Ceil <dbl>,
## #   cl_Days <dbl>, TermNum <int>, ObligationWT <dbl>, NChg <fct>,
## #   ContractCount <dbl>
# 
# ggplot(TerminatedEndSummary,
#        aes(x=StartFY,
#            y=Count,
#            color=Term))+geom_line()+    geom_point(aes(shape=metric))+facet_grid(Ceil ~ EndAfterPeriod ) +scale_y_log10()
# 
# TerminatedUnmodifiedYearsCatStat<-rbind(ddply(subset(def_all,
#                                 !is.na(Dur) & StartFY>=2007 & 
#                                     !is.na(UnmodifiedYearsCat) &
#                                     UnmodifiedCurrentCompletionDate<as.Date("2015-09-30")&
#                                     !is.na(Term)),
#                          .(UnmodifiedYearsCat,
#                            StartFY
#                          ),
#                          
#                          dplyr::summarise,
#                          Action.Obligation=sum(Action.Obligation),
#                          Count=length(CSIScontractID),
#                          mean = mean(TermNum),
#                          sd   = NA ,# sd(TermNum),
#                          se   = NA, #sd / sqrt(Count),
#                          metric="Unweighted"
#                           # ceil.mean = wtd.mean(TermNum,UnmodifiedContractBaseAndAllOptionsValue),
#                          # ceil.cat.mean = wtd.mean(TermNum,ceil.median.wt)
# ))
# 
# 
# 
# TerminatedUnmodifiedYearsCatStat<-rbind(TerminatedUnmodifiedYearsCatStat,
#                          ddply(subset(def_all,
#                                 !is.na(Dur) & StartFY>=2007 & 
#                                     !is.na(Ceil) &
#                                     UnmodifiedCurrentCompletionDate<as.Date("2015-09-30")&
#                                     !is.na(Term)),
#                          .(UnmodifiedYearsCat,
#                            StartFY
#                          ),
#                          
#                          dplyr::summarise,
#                          Action.Obligation=sum(Action.Obligation),
#                          Count=length(CSIScontractID),
#                          mean = wtd.mean(TermNum,ObligationWT),
#                          sd   = NA ,# sd(TermNum),
#                          se   = NA, #sd / sqrt(Count),
#                          metric="Obligation Weighted"
#                           # ceil.mean = wtd.mean(TermNum,UnmodifiedContractBaseAndAllOptionsValue),
#                          # ceil.cat.mean = wtd.mean(TermNum,ceil.median.wt)
# 
# ))
# 
# TerminatedUnmodifiedYearsCatStat<-rbind(TerminatedUnmodifiedYearsCatStat,
#                          ddply(subset(def_all,
#                                 !is.na(Dur) & StartFY>=2007 & 
#                                     !is.na(Ceil) &
#                                     UnmodifiedCurrentCompletionDate<as.Date("2015-09-30")&
#                                     !is.na(Term)),
#                          .(UnmodifiedYearsCat,
#                            StartFY
#                          ),
#                          
#                          dplyr::summarise,
#                          Action.Obligation=sum(Action.Obligation),
#                          Count=length(CSIScontractID),
#                          mean = wtd.mean(TermNum,UnmodifiedContractBaseAndAllOptionsValue),
#                          sd   = NA ,# sd(TermNum),
#                          se   = NA, #sd / sqrt(Count),
#                          metric="Ceiling Weighted"
#                          # obl.mean = ,
#                          # ceil.mean = wtd.mean(TermNum,UnmodifiedContractBaseAndAllOptionsValue),
#                          # ceil.cat.mean = wtd.mean(TermNum,ceil.median.wt)
# 
# ))
# 
# 
# TerminatedUnmodifiedYearsCatStat<-rbind(TerminatedUnmodifiedYearsCatStat,
#                          ddply(subset(def_all,
#                                 !is.na(Dur) & StartFY>=2007 & 
#                                     !is.na(Ceil) &
#                                     UnmodifiedCurrentCompletionDate<as.Date("2015-09-30")&
#                                     !is.na(Term)),
#                          .(UnmodifiedYearsCat,
#                            StartFY
#                          ),
#                          
#                          dplyr::summarise,
#                          Action.Obligation=sum(Action.Obligation),
#                          Count=length(CSIScontractID),
#                          mean = wtd.mean(TermNum,ceil.median.wt),
#                          sd   = NA ,# sd(TermNum),
#                          se   = NA, #sd / sqrt(Count),
#                          metric="Ceiling Category Weighted"
# 
# ))
# 
# 
# # 
# # pd <- position_dodge(0.1) # move them .05 to the left and right
# # 
# # ggplot(tgc, aes(x=dose, y=len, colour=supp)) + 
# #     geom_errorbar(aes(ymin=len-se, ymax=len+se), width=.1, position=pd) 
# 
# ggplot(TerminatedUnmodifiedYearsCatStat,aes(x=StartFY,y=mean,color=metric))+
#     geom_line()+
#         geom_point(aes(shape=metric))+
#     facet_grid(.~ UnmodifiedYearsCat ) +
#     scale_x_continuous("Contract Starting Fiscal Year")+
#     scale_y_continuous(label=percent)+
#     geom_errorbar(aes(ymin=mean-se, ymax=mean+se), width=.1)+
#     theme(legend.position="bottom") #, position=pd
# 
# TerminatedDurStat<-rbind(                         ddply(subset(def_all,
#                                 !is.na(Dur) & StartFY>=2007 & 
#                                     StartFY<=2014 & 
#                                     !is.na(Ceil) &
#                                     UnmodifiedCurrentCompletionDate<as.Date("2015-09-30")&
#                                     !is.na(Term)),
#                          .(Dur,
#                            StartFY
#                          ),
#                          
#                          dplyr::summarise,
#                          Action.Obligation=sum(Action.Obligation),
#                          Count=length(CSIScontractID),
#                          mean = mean(TermNum),
#                          sd   = sd(TermNum),
#                          se   = sd / sqrt(Count),
#                          metric="Unweighted"
#                           # ceil.mean = wtd.mean(TermNum,UnmodifiedContractBaseAndAllOptionsValue),
#                          # ceil.cat.mean = wtd.mean(TermNum,ceil.median.wt)
# ))
# 
# 
# TerminatedDurStat<-rbind(TerminatedDurStat,
#                          ddply(subset(def_all,
#                                 !is.na(Dur) & StartFY>=2007 & 
#                                     StartFY<=2014 & 
#                                     !is.na(Ceil) &
#                                     UnmodifiedCurrentCompletionDate<as.Date("2015-09-30")&
#                                     !is.na(Term)),
#                          .(Dur,
#                            StartFY
#                          ),
#                          
#                          dplyr::summarise,
#                          Action.Obligation=sum(Action.Obligation),
#                          Count=length(CSIScontractID),
#                          mean = wtd.mean(TermNum,ObligationWT),
#                          sd   = sqrt(wtd.var(TermNum,ObligationWT)) ,
#                          se   = sd / sqrt(Count),
#                          metric="Obligation Weighted"
#                           # ceil.mean = wtd.mean(TermNum,UnmodifiedContractBaseAndAllOptionsValue),
#                          # ceil.cat.mean = wtd.mean(TermNum,ceil.median.wt)
# ))
# 
# TerminatedDurStat<-rbind(TerminatedDurStat,
#                          ddply(subset(def_all,
#                                 !is.na(Dur) & StartFY>=2007 & 
#                                     StartFY<=2014 & 
#                                     !is.na(Ceil) &
#                                     UnmodifiedCurrentCompletionDate<as.Date("2015-09-30")&
#                                     !is.na(Term)),
#                          .(Dur,
#                            StartFY
#                          ),
#                          
#                          dplyr::summarise,
#                          Action.Obligation=sum(Action.Obligation),
#                          Count=length(CSIScontractID),
#                          mean = wtd.mean(TermNum,UnmodifiedContractBaseAndAllOptionsValue),
#                          sd   = sqrt(wtd.var(TermNum,UnmodifiedContractBaseAndAllOptionsValue)) ,
#                          se   = sd / sqrt(Count),
#                          metric="Ceiling Weighted"
#                          # obl.mean = ,
#                          # ceil.mean = wtd.mean(TermNum,UnmodifiedContractBaseAndAllOptionsValue),
#                          # ceil.cat.mean = wtd.mean(TermNum,ceil.median.wt)
# 
# ))
# 
# 
# TerminatedDurStat<-rbind(TerminatedDurStat,
#                          ddply(subset(def_all,
#                                 !is.na(Dur) & StartFY>=2007 & 
#                                     StartFY<=2014 & 
#                                     !is.na(Ceil) &
#                                     UnmodifiedCurrentCompletionDate<as.Date("2015-09-30")&
#                                     !is.na(Term)),
#                          .(Dur,
#                            StartFY
#                          ),
#                          
#                          dplyr::summarise,
#                          Action.Obligation=sum(Action.Obligation),
#                          Count=length(CSIScontractID),
#                          mean = wtd.mean(TermNum,ceil.median.wt),
#                          sd   = sqrt(wtd.var(TermNum,ceil.median.wt)) ,
#                          se   = sd / sqrt(Count),
#                          metric="Ceiling Category Weighted"
# 
# ))
# 
# 
# # 
# # pd <- position_dodge(0.1) # move them .05 to the left and right
# # 
# # ggplot(tgc, aes(x=dose, y=len, colour=supp)) + 
# #     geom_errorbar(aes(ymin=len-se, ymax=len+se), width=.1, position=pd) 
# 
# ggplot(subset(TerminatedDurStat,!metric %in% c("Ceiling Weighted")),
#        aes(x=StartFY,y=mean,color=metric))+
#     geom_line()+
#         geom_point(aes(shape=metric))+
#     facet_grid( Dur  ~.) +
#     scale_x_continuous("Contract Starting Fiscal Year")+
#     scale_y_continuous(label=percent)+
#     geom_errorbar(aes(ymin=mean-se, ymax=mean+se), width=.1)+
#     theme(legend.position="bottom") #, position=pd
# 
# ggplot(subset(TerminatedDurStat,!metric %in% c("Ceiling Weighted","Ceiling Category Weighted")),
#        aes(x=StartFY,y=mean,color=metric))+
#     geom_line()+
#         geom_point(aes(shape=metric))+
#     facet_grid( Dur  ~., space = "free_y", scales="free_y") +
#     scale_x_continuous("Contract Starting Fiscal Year")+
#     scale_y_continuous(label=percent)+
#     geom_errorbar(aes(ymin=mean-se, ymax=mean+se), width=.1)+
#     theme(legend.position="bottom") #, position=pd
# 
# 
# TerminatedDur.SimpleStatCount<-ddply(subset(def_all,
#                                 !is.na(Dur) & StartFY>=2007 & 
#                                     StartFY<=2014 & 
#                                     !is.na(Ceil) &
#                                     UnmodifiedCurrentCompletionDate<as.Date("2015-09-30")&
#                                     !is.na(Term)),
#                          .(Dur.Simple,
#                            StartFY,
#                            Term
#                          ),
#                          
#                          dplyr::summarise,
#                          Action.Obligation=sum(Action.Obligation),
#                          Count=length(CSIScontractID)
# )
# 
# ggplot(TerminatedDur.SimpleStatCount,
#        aes(x=StartFY,y=Count,color=Term))+
#     geom_line()+
#     geom_point(aes(shape=Term))+
#     facet_grid( Dur.Simple  ~.) +
#     scale_x_continuous("Contract Starting Fiscal Year")+
#     scale_y_log10("Number of Contracts",label=comma)
#     geom_errorbar(aes(ymin=mean-se, ymax=mean+se), width=.1)+
#     # theme(legend.position="bottom") #, position=pd
# 
# ggplot(TerminatedDur.SimpleStatCount,
#        aes(x=StartFY,y=Count,color=Term))+
#     geom_line()+
#         geom_point(aes(shape=Term))+
#     facet_grid( Dur.Simple  ~., ) +#
#     scale_x_continuous("Contract Starting Fiscal Year")+
#         
#     scale_y_log10("Number of Contracts (Variable Scale)",label=comma)
#     # geom_errorbar(aes(ymin=mean-se, ymax=mean+se), width=.1)+
#     theme(legend.position="bottom") #, position=pd
#     
# ddply(TerminatedDurStat,
#       .(Dur),
#       dplyr::summarise,
#       Count=sum(Count),
#       Action.Obligation=sum(Action.Obligation))
# 
# 
# 
# TerminatedDur.SimpleStat<-rbind(                         ddply(subset(def_all,
#                                 !is.na(Dur) & StartFY>=2007 & 
#                                     StartFY<=2014 & 
#                                     !is.na(Ceil) &
#                                     UnmodifiedCurrentCompletionDate<as.Date("2015-09-30")&
#                                     !is.na(Term)),
#                          .(Dur.Simple,
#                            StartFY
#                          ),
#                          
#                          dplyr::summarise,
#                          Action.Obligation=sum(Action.Obligation),
#                          Count=length(CSIScontractID),
#                          mean = mean(TermNum),
#                          sd   = sd(TermNum),
#                          se   = sd / sqrt(Count),
#                          metric="Unweighted"
#                           # ceil.mean = wtd.mean(TermNum,UnmodifiedContractBaseAndAllOptionsValue),
#                          # ceil.cat.mean = wtd.mean(TermNum,ceil.median.wt)
# ))
# 
# 
# TerminatedDur.SimpleStat<-rbind(TerminatedDur.SimpleStat,
#                          ddply(subset(def_all,
#                                 !is.na(Dur) & StartFY>=2007 & 
#                                     StartFY<=2014 & 
#                                     !is.na(Ceil) &
#                                     UnmodifiedCurrentCompletionDate<as.Date("2015-09-30")&
#                                     !is.na(Term)),
#                          .(Dur.Simple,
#                            StartFY
#                          ),
#                          
#                          dplyr::summarise,
#                          Action.Obligation=sum(Action.Obligation),
#                          Count=length(CSIScontractID),
#                          mean = wtd.mean(TermNum,ObligationWT),
#                          sd   = sqrt(wtd.var(TermNum,ObligationWT)) ,
#                          se   = sd / sqrt(Count),
#                          metric="Obligation Weighted"
#                           # ceil.mean = wtd.mean(TermNum,UnmodifiedContractBaseAndAllOptionsValue),
#                          # ceil.cat.mean = wtd.mean(TermNum,ceil.median.wt)
# ))
# 
# TerminatedDur.SimpleStat<-rbind(TerminatedDur.SimpleStat,
#                          ddply(subset(def_all,
#                                 !is.na(Dur) & StartFY>=2007 & 
#                                     StartFY<=2014 & 
#                                     !is.na(Ceil) &
#                                     UnmodifiedCurrentCompletionDate<as.Date("2015-09-30")&
#                                     !is.na(Term)),
#                          .(Dur.Simple,
#                            StartFY
#                          ),
#                          
#                          dplyr::summarise,
#                          Action.Obligation=sum(Action.Obligation),
#                          Count=length(CSIScontractID),
#                          mean = wtd.mean(TermNum,UnmodifiedContractBaseAndAllOptionsValue),
#                          sd   = sqrt(wtd.var(TermNum,UnmodifiedContractBaseAndAllOptionsValue)) ,
#                          se   = sd / sqrt(Count),
#                          metric="Ceiling Weighted"
#                          # obl.mean = ,
#                          # ceil.mean = wtd.mean(TermNum,UnmodifiedContractBaseAndAllOptionsValue),
#                          # ceil.cat.mean = wtd.mean(TermNum,ceil.median.wt)
# 
# ))
# 
# 
# TerminatedDur.SimpleStat<-rbind(TerminatedDur.SimpleStat,
#                          ddply(subset(def_all,
#                                 !is.na(Dur) & StartFY>=2007 & 
#                                     StartFY<=2014 & 
#                                     !is.na(Ceil) &
#                                     UnmodifiedCurrentCompletionDate<as.Date("2015-09-30")&
#                                     !is.na(Term)),
#                          .(Dur.Simple,
#                            StartFY
#                          ),
#                          
#                          dplyr::summarise,
#                          Action.Obligation=sum(Action.Obligation),
#                          Count=length(CSIScontractID),
#                          mean = wtd.mean(TermNum,ceil.median.wt),
#                          sd   = sqrt(wtd.var(TermNum,ceil.median.wt)) ,
#                          se   = sd / sqrt(Count),
#                          metric="Ceiling Category Weighted"
# 
# ))
# 
# 
# # 
# # pd <- position_dodge(0.1) # move them .05 to the left and right
# # 
# # ggplot(tgc, aes(x=dose, y=len, colour=supp)) + 
# #     geom_errorbar(aes(ymin=len-se, ymax=len+se), width=.1, position=pd) 
# 
# ggplot(subset(TerminatedDur.SimpleStat,!metric %in% c("Ceiling Weighted","Ceiling Category Weighted")),
#        aes(x=StartFY,y=mean,color=metric))+
#     geom_line()+
#     geom_point(aes(shape=metric))+
#     facet_grid( Dur.Simple  ~.) +
#     scale_x_continuous("Contract Starting Fiscal Year")+
#     scale_y_continuous(label=percent)+
#     geom_errorbar(aes(ymin=mean-se, ymax=mean+se), width=.1)+
#     theme(legend.position="bottom") #, position=pd
# 
# ggplot(subset(TerminatedDur.SimpleStat,!metric %in% c("Ceiling Weighted","Ceiling Category Weighted")),
#        aes(x=StartFY,y=mean,color=metric))+
#     geom_line()+
#         geom_point(aes(shape=metric))+
#     facet_grid( Dur.Simple  ~., space = "free_y", scales="free_y") +
#     scale_x_continuous("Contract Starting Fiscal Year")+
#     scale_y_continuous("Percent Terminated",label=percent)+
#     geom_errorbar(aes(ymin=mean-se, ymax=mean+se), width=.1)+
#     theme(legend.position="bottom")+ #, position=pd
# TerminatedSDurSCeilStatCount<-ddply(subset(def_all,
#                                 !is.na(Dur) & StartFY>=2007 & 
#                                     StartFY<=2014 & 
#                                     !is.na(Ceil) &
#                                     UnmodifiedCurrentCompletionDate<as.Date("2015-09-30")&
#                                     !is.na(Term)),
#                          .(Dur.Simple,
#                            Ceil,
#                            StartFY,
#                            Term
#                          ),
#                          
#                          dplyr::summarise,
#                          Action.Obligation=sum(Action.Obligation),
#                          Count=length(CSIScontractID)
# )
# 
# 
# 
# ggplot(TerminatedSDurSCeilStatCount,
#        aes(x=StartFY,y=Count,color=Term))+
#     geom_line()+
#     geom_point(aes(shape=Term))+
#     facet_grid( Dur.Simple  ~ Ceil) +
#     scale_x_continuous("Contract Starting Fiscal Year")+
#     scale_y_log10("Number of Contracts",label=comma)
#     # theme(legend.position="bottom") #, position=pd
# 
# ggplot(TerminatedSDurSCeilStatCount,
#        aes(x=StartFY,y=Count,color=Term))+
#     geom_line()+
#         geom_point(aes(shape=Term))+
#     facet_grid( Dur.Simple  ~ Ceil ) +#
#     scale_x_continuous("Contract Starting Fiscal Year")+
#         
#     scale_y_log10("Number of Contracts (Variable Scale)",label=comma)
#     # geom_errorbar(aes(ymin=mean-se, ymax=mean+se), width=.1)+
#     theme(legend.position="bottom") #, position=pd
#     

Terminations

Contract terminations and the number of change orders can be calculated for the entire sample. Contract termination is determined using the Reason for Modification field in FPDS. A contract is considered to be terminated if it has at least one modification with the following values:

These four catetegories and the “Close Out” category are used to mark a contract as closed. Many contracts in FPDS and in the sample are never marked closed.

Termination Timeline

TerminatedSDurSCeilStatCount<-
  only_complete(def_all) %>%
  group_by(Dur.Simple,
      Ceil.Simple,
      StartFY,
      Term
    ) %>%
    dplyr::summarise(
    Action.Obligation=sum(Action.Obligation),
    Count=length(CSIScontractID),
    metric="Contracts within Period"
)

TerminatedSDurSCeilStatCount<-rbind(TerminatedSDurSCeilStatCount,
                                    all_labeled(def_all) %>%                
                                    group_by(Dur.Simple,
                                      Ceil.Simple,
                                      StartFY,
                                      Term
                                    ) %>%
                                    dplyr::summarise(
                                    Action.Obligation=sum(Action.Obligation),
                                    Count=length(CSIScontractID),
                                    metric="Early Results for All Contracts"
))

TerminatedSDurSCeilStatCount$metric<-factor(TerminatedSDurSCeilStatCount$metric,
                                            levels=c("Contracts within Period",
                                                   "Early Results for All Contracts"),
                                            ordered=TRUE)

TerminatedSDurSCeilStatCount$Term<-factor(TerminatedSDurSCeilStatCount$Term,
                                          levels=c("Unterminated",
                                                   "Terminated"),
                                          labels=c("Unterminated",
                                                   "Complete or Partial Termination"),
                                            ordered=TRUE)


TerminatedSDurSCeilLabels<-
    subset(TerminatedSDurSCeilStatCount,metric=="Contracts within Period") %>%
    group_by(Dur.Simple,Ceil.Simple) %>%
    dplyr::summarise(
    FacetCount=paste("Count:",prettyNum(sum(Count),big.mark=",")),
    FacetValue=paste(FacetCount,"\nObligated: $",round(sum(Action.Obligation)/1000000000,1),"B",sep="")
    )

Ypos<-max(TerminatedSDurSCeilStatCount$Count)


ggplot(TerminatedSDurSCeilStatCount,
       aes(x=StartFY,y=Count,color=Term))+
    geom_line(aes(linetype=metric))+
    geom_point(aes(shape=Term))+
    geom_text(data=TerminatedSDurSCeilLabels,
              aes(x=2007,y=Ypos,label=FacetValue),
              # parse=TRUE,
              hjust=0,
              vjust=1,
              color="black")+
    facet_grid( Dur.Simple  ~ Ceil.Simple ) +#
    scale_x_continuous("Contract Starting Fiscal Year")+
    scale_color_manual("Status", values=c("blue","red"))+
    scale_linetype_discrete("Early Results")+
    scale_shape_discrete("Status")+
    scale_y_log10("Number of Contracts (Logorithmic Scale)",label=scales::comma)+
    # geom_errorbar(aes(ymin=mean-se, ymax=mean+se), width=.1)+
    theme(legend.position="bottom") #, position=pd

summary(def_all$StartFY
        )
##    Min. 1st Qu.  Median    Mean 3rd Qu.    Max. 
##    2007    2009    2011    2011    2014    2015
ggplot(def_all,aes(x=l_Days))+geom_histogram()
## `stat_bin()` using `bins = 30`. Pick better value with `binwidth`.
## Warning: Removed 174983 rows containing non-finite values (stat_bin).

ggplot(subset(def_all,UnmodifiedDays<1),aes(x=UnmodifiedDays))+geom_histogram()

# summary(def_all$Dur)
# 
# TerminatedDur.SimpleIntlStat<-rbind(                         ddply(subset(def_all,
#                                 !is.na(Dur) & StartFY>=2007 & 
#                                     StartFY<=2014 & (LastCurrentCompletionDate<=strptime("2015-09-30","%Y-%m-%d") | IsClosed==1) &
#                                     !is.na(Ceil) &
#                                     !is.na(Intl) &
#                                     UnmodifiedCurrentCompletionDate<as.Date("2015-09-30")&
#                                     !is.na(Term)),
#                          .(Dur.Simple,
#                            StartFY,
#                            Intl
#                          ),
#                          
#                          dplyr::summarise,
#                          Action.Obligation=sum(Action.Obligation),
#                          Count=length(CSIScontractID),
#                          mean = mean(TermNum),
#                          sd   = sd(TermNum),
#                          se   = sd / sqrt(Count),
#                          metric="Unweighted"
#                           # ceil.mean = wtd.mean(TermNum,UnmodifiedContractBaseAndAllOptionsValue),
#                          # ceil.cat.mean = wtd.mean(TermNum,ceil.median.wt)
# ))
# 
# 
# 
# TerminatedDur.SimpleIntlStat<-rbind(TerminatedDur.SimpleIntlStat,
#                          ddply(subset(def_all,
#                                 !is.na(Dur) & StartFY>=2007 & 
#                                     StartFY<=2014 & (LastCurrentCompletionDate<=strptime("2015-09-30","%Y-%m-%d") | IsClosed==1) &
#                                     !is.na(Ceil) &
#                                     !is.na(Intl) &
#                                     UnmodifiedCurrentCompletionDate<as.Date("2015-09-30")&
#                                     !is.na(Term)),
#                          .(Dur.Simple,
#                            StartFY,
#                            Intl
#                          ),
#                          
#                          dplyr::summarise,
#                          Action.Obligation=sum(Action.Obligation),
#                          Count=length(CSIScontractID),
#                          mean = wtd.mean(TermNum,ObligationWT),
#                          sd   = sqrt(wtd.var(TermNum,ObligationWT)) ,
#                          se   = sd / sqrt(Count),
#                          metric="Obligation Weighted"
#                           # ceil.mean = wtd.mean(TermNum,UnmodifiedContractBaseAndAllOptionsValue),
#                          # ceil.cat.mean = wtd.mean(TermNum,ceil.median.wt)
# ))
# 
# TerminatedDur.SimpleIntlStat<-rbind(TerminatedDur.SimpleIntlStat,
#                          ddply(subset(def_all,
#                                 !is.na(Dur) & StartFY>=2007 & 
#                                     StartFY<=2014 & (LastCurrentCompletionDate<=strptime("2015-09-30","%Y-%m-%d") | IsClosed==1) &
#                                     !is.na(Ceil) &
#                                     !is.na(Intl) &
#                                     UnmodifiedCurrentCompletionDate<as.Date("2015-09-30")&
#                                     !is.na(Term)),
#                          .(Dur.Simple,
#                            StartFY,
#                            Intl
#                          ),
#                          
#                          dplyr::summarise,
#                          Action.Obligation=sum(Action.Obligation),
#                          Count=length(CSIScontractID),
#                          mean = wtd.mean(TermNum,UnmodifiedContractBaseAndAllOptionsValue),
#                          sd   = sqrt(wtd.var(TermNum,UnmodifiedContractBaseAndAllOptionsValue)) ,
#                          se   = sd / sqrt(Count),
#                          metric="Ceiling Weighted"
#                          # obl.mean = ,
#                          # ceil.mean = wtd.mean(TermNum,UnmodifiedContractBaseAndAllOptionsValue),
#                          # ceil.cat.mean = wtd.mean(TermNum,ceil.median.wt)
# 
# ))
# 
# 
# TerminatedDur.SimpleIntlStat<-rbind(TerminatedDur.SimpleIntlStat,
#                          ddply(subset(def_all,
#                                 !is.na(Dur) & StartFY>=2007 & 
#                                     StartFY<=2014 & (LastCurrentCompletionDate<=strptime("2015-09-30","%Y-%m-%d") | IsClosed==1) &
#                                     !is.na(Ceil) &
#                                     !is.na(Intl) &
#                                     UnmodifiedCurrentCompletionDate<as.Date("2015-09-30")&
#                                     !is.na(Term)),
#                          .(Dur.Simple,
#                            StartFY,
#                            Intl
#                          ),
#                          
#                          dplyr::summarise,
#                          Action.Obligation=sum(Action.Obligation),
#                          Count=length(CSIScontractID),
#                          mean = wtd.mean(TermNum,ceil.median.wt),
#                          sd   = sqrt(wtd.var(TermNum,ceil.median.wt)) ,
#                          se   = sd / sqrt(Count),
#                          metric="Ceiling Category Weighted"
# 
# ))
# 
# 
# # 
# # pd <- position_dodge(0.1) # move them .05 to the left and right
# # 
# # ggplot(tgc, aes(x=dose, y=len, colour=supp)) + 
# #     geom_errorbar(aes(ymin=len-se, ymax=len+se), width=.1, position=pd) 
# 
# ddply(TerminatedDur.SimpleIntlStat,
#       .(Dur.Simple,
#         Intl,
#         metric),
#       dplyr::summarise,
#       Count=sum(Count),
#       Action.Obligation=sum(Action.Obligation))
# 
# TermLabels<-ddply(
#     subset(TerminatedDur.SimpleIntlStat,
#            !metric %in% c("Ceiling Weighted",
#                           "Ceiling Category Weighted")),
#     .(Dur.Simple,Intl,metric),
#     dplyr::summarise,
#     FacetCount=paste("Count:",prettyNum(sum(Count),big.mark=",")),
#     FacetValue=paste(FacetCount,"\nObligated: $",round(sum(Action.Obligation)/1000000000,1),"B",sep=""),
#     FacetY=max(mean+se))
# 
# TermLabels<-ddply(TermLabels,
#       .(Dur.Simple),
#       dplyr::mutate,
#       FacetY=max(FacetY))
# 
# ggplot(subset(TerminatedDur.SimpleIntlStat,!metric %in% c("Ceiling Weighted","Ceiling Category Weighted")),
#        aes(x=StartFY,y=mean,color=metric))+
#     geom_line()+
#         geom_point(aes(shape=metric))+
#     geom_text(data=TermLabels,
#               aes(x=2007,y=FacetY,label=FacetValue),
#               # parse=TRUE,
#               hjust=0,
#               vjust=1,
#               color="black")+
#     
#     facet_grid( Dur.Simple  ~ Intl, space = "free_y", scales="free_y") +
#     scale_x_continuous("Contract Starting Fiscal Year")+
#     scale_y_continuous("Percent Terminated",label=percent)+
#     geom_errorbar(aes(ymin=mean-se, ymax=mean+se), width=.1)+
#     theme(legend.position="bottom") #, position=pd
# 
# 
# 
# ggplot(subset(TerminatedDur.SimpleIntlStat,!metric %in% c("Ceiling Weighted","Ceiling Category Weighted")),
#        aes(x=StartFY,y=mean,color=metric))+
#     geom_line()+
#         geom_point(aes(shape=metric))+
#     geom_text(data=TermLabels,
#               aes(x=2007,y=FacetY,label=FacetValue),
#               # parse=TRUE,
#               hjust=0,
#               vjust=1,
#               color="black")+
#     facet_grid( Dur.Simple  ~ Intl, space = "free_y", scales="free_y") +
#     scale_x_continuous("Contract Starting Fiscal Year")+
#     scale_y_continuous("Percent Terminated",label=percent)+
#     geom_errorbar(aes(ymin=mean-se, ymax=mean+se), width=.1)+
#     theme(legend.position="bottom") #, position=pd
# 
# 
# TerminatedDurCeilStat<-ddply(subset(def_all,
#                                 !is.na(Dur) & StartFY>=2007 & 
#                                     !is.na(Ceil) &
#                                     UnmodifiedCurrentCompletionDate<as.Date("2015-09-30")&
#                                     !is.na(Term)),
#                          .(Ceil,
#                            Dur,
#                            StartFY
#                          ),
#                          
#                          dplyr::summarise,
#                          Action.Obligation=sum(Action.Obligation),
#                          Count=length(CSIScontractID),
#                          N    = length(TermNum),
#                          mean = mean(TermNum),
#                          sd   = sd(TermNum),
#                          se   = sd / sqrt(N),
#                          obl.mean = wtd.mean(TermNum,ObligationWT,na.rm=TRUE),
#                          ceil.mean = wtd.mean(TermNum,UnmodifiedContractBaseAndAllOptionsValue)
# )
# # 
# # pd <- position_dodge(0.1) # move them .05 to the left and right
# # 
# # ggplot(tgc, aes(x=dose, y=len, colour=supp)) + 
# #     geom_errorbar(aes(ymin=len-se, ymax=len+se), width=.1, position=pd) 
# 
# ggplot(TerminatedDurCeilStat,aes(x=StartFY))+
#     geom_line(aes(y=mean))+
#     # geom_line(aes(y=ceil.mean))+
#     geom_line(aes(y=obl.mean))+
#     geom_point(aes(y=mean))+
#     facet_grid(Ceil ~ Dur ) +
#     scale_x_continuous("Contract Starting Fiscal Year")+
#     scale_y_continuous("Percent Terminated",label=percent)+
#     geom_errorbar(aes(ymin=mean-se, ymax=mean+se), width=.1) #, position=pd
# 
# 
# ggplot(TerminatedDurCeilStat,
#        aes(x=StartFY,
#            y=obl.mean))+geom_line()+    geom_point()+facet_grid(Ceil ~ Dur ) +
#     scale_x_continuous("Contract Starting Fiscal Year")+
#     scale_y_continuous("Percent Terminated",label=percent)
#          # geom_errorbar(aes(ymin=mean-se, ymax=mean+se), width=.1) #, position=pd
# 
# 
# ggplot(TerminatedDurCeilStat,
#        aes(x=StartFY,
#            y=ceil.mean))+geom_line()+    geom_point()+facet_grid(Ceil ~ Dur ) +
#     scale_x_continuous("Contract Starting Fiscal Year")+
#     scale_y_continuous("Percent Terminated",label=percent)
#          # geom_errorbar(aes(ymin=mean-se, ymax=mean+se), width=.1) #, position=pd
# 
# 
# ```
# 
# 
# 
# 
# 
# ```{r FxCBcategories, fig.width=3,fig.height=9, dpi=600}
# 
# 
# 
# 
# TerminatedFxCb<-rbind(                         ddply(subset(def_all,
#                                 !is.na(FxCb) & StartFY>=2007 & 
#                                     StartFY<=2014 & (LastCurrentCompletionDate<=strptime("2015-09-30","%Y-%m-%d") | IsClosed==1) &
#                                     !is.na(Ceil) &
#                                     UnmodifiedCurrentCompletionDate<as.Date("2015-09-30")&
#                                     !is.na(Term)),
#                          .(FxCb,
#                            StartFY
#                          ),
#                          
#                          dplyr::summarise,
#                          Action.Obligation=sum(Action.Obligation),
#                          Count=length(CSIScontractID),
#                          mean = mean(TermNum),
#                          sd   = sd(TermNum),
#                          se   = sd / sqrt(Count),
#                          metric="Unweighted"
#                           # ceil.mean = wtd.mean(TermNum,UnmodifiedContractBaseAndAllOptionsValue),
#                          # ceil.cat.mean = wtd.mean(TermNum,ceil.median.wt)
# ))
# 
# 
# TerminatedFxCb<-rbind(TerminatedFxCb,
#                          ddply(subset(def_all,
#                                 !is.na(FxCb) & StartFY>=2007 & 
#                                     StartFY<=2014 & (LastCurrentCompletionDate<=strptime("2015-09-30","%Y-%m-%d") | IsClosed==1) &
#                                     !is.na(Ceil) &
#                                     UnmodifiedCurrentCompletionDate<as.Date("2015-09-30")&
#                                     !is.na(Term)),
#                          .(FxCb,
#                            StartFY
#                          ),
#                          
#                          dplyr::summarise,
#                          Action.Obligation=sum(Action.Obligation),
#                          Count=length(CSIScontractID),
#                          mean = wtd.mean(TermNum,ObligationWT),
#                          sd   = sqrt(wtd.var(TermNum,ObligationWT)) ,
#                          se   = sd / sqrt(Count),
#                          metric="Obligation Weighted"
#                           # ceil.mean = wtd.mean(TermNum,UnmodifiedContractBaseAndAllOptionsValue),
#                          # ceil.cat.mean = wtd.mean(TermNum,ceil.median.wt)
# ))
# 
# TerminatedFxCb<-rbind(TerminatedFxCb,
#                          ddply(subset(def_all,
#                                 !is.na(FxCb) & StartFY>=2007 & 
#                                     StartFY<=2014 & (LastCurrentCompletionDate<=strptime("2015-09-30","%Y-%m-%d") | IsClosed==1) &
#                                     !is.na(Ceil) &
#                                     UnmodifiedCurrentCompletionDate<as.Date("2015-09-30")&
#                                     !is.na(Term)),
#                          .(FxCb,
#                            StartFY
#                          ),
#                          
#                          dplyr::summarise,
#                          Action.Obligation=sum(Action.Obligation),
#                          Count=length(CSIScontractID),
#                          mean = wtd.mean(TermNum,UnmodifiedContractBaseAndAllOptionsValue),
#                          sd   = sqrt(wtd.var(TermNum,UnmodifiedContractBaseAndAllOptionsValue)) ,
#                          se   = sd / sqrt(Count),
#                          metric="Ceiling Weighted"
#                          # obl.mean = ,
#                          # ceil.mean = wtd.mean(TermNum,UnmodifiedContractBaseAndAllOptionsValue),
#                          # ceil.cat.mean = wtd.mean(TermNum,ceil.median.wt)
# 
# ))
# 
# 
# TerminatedFxCb<-rbind(TerminatedFxCb,
#                          ddply(subset(def_all,
#                                 !is.na(FxCb) & StartFY>=2007 & 
#                                     StartFY<=2014 & (LastCurrentCompletionDate<=strptime("2015-09-30","%Y-%m-%d") | IsClosed==1) &
#                                     !is.na(Ceil) &
#                                     UnmodifiedCurrentCompletionDate<as.Date("2015-09-30")&
#                                     !is.na(Term)),
#                          .(FxCb,
#                            StartFY
#                          ),
#                          
#                          dplyr::summarise,
#                          Action.Obligation=sum(Action.Obligation),
#                          Count=length(CSIScontractID),
#                          mean = wtd.mean(TermNum,ceil.median.wt),
#                          sd   = sqrt(wtd.var(TermNum,ceil.median.wt)) ,
#                          se   = sd / sqrt(Count),
#                          metric="Ceiling Category Weighted"
# 
# ))
# 
# 
# # 
# # pd <- position_dodge(0.1) # move them .05 to the left and right
# # 
# # ggplot(tgc, aes(x=dose, y=len, colour=supp)) + 
# #     geom_errorbar(aes(ymin=len-se, ymax=len+se), width=.1, position=pd) 
# 
# ggplot(subset(TerminatedFxCb,!metric %in% c("Ceiling Weighted","Ceiling Category Weighted")),
#        aes(x=StartFY,y=mean,color=metric))+
#     geom_line()+
#     geom_point(aes(shape=metric))+
#     facet_grid( FxCb  ~.) +
#     scale_x_continuous("Contract Starting Fiscal Year")+
#     scale_y_continuous(label=percent)+
#     geom_errorbar(aes(ymin=mean-se, ymax=mean+se), width=.1)+
#     theme(legend.position="bottom") #, position=pd
# 
# ggplot(TerminatedFxCb,#subset(TerminatedFxCb,!metric %in% c("Ceiling Weighted","Ceiling Category Weighted")),
#        aes(x=StartFY,y=mean,color=metric))+
#     geom_line()+
#         geom_point(aes(shape=metric))+
#     facet_grid( FxCb  ~., space = "free_y", scales="free_y") +
#     scale_x_continuous("Contract Starting Fiscal Year")+
#     scale_y_continuous("Percent Terminated",label=percent)+
#     geom_errorbar(aes(ymin=mean-se, ymax=mean+se), width=.1)+
#     theme(legend.position="bottom") #, position=pd

Contracts are classified using a mix of numerical and categorical variables. While the changes in numerical variables are easy to grasp and summarize, a contract may have one line item that is competed and another that is not. As is detailed in the exploration on R&D, we are only considering information available prior to contract start. The percentage of contract obligations that were competed is a valuable benchmark, but is highly influenced by factors that occured after contract start..

Ceiling Breaches

In the same manner as contract terminations, change orders are reported in the reason for modification field. There are two values that this study counts as change orders: “Change Order” and “Definitize Change Order.” For the remainder of this report, contracts with at least one change order are called Changed Contracts.

There are also multiple modifications captured in FPDS that this current study will not investigate as change orders. These include:

In addition, there are a number of other modifications that may be undertaken based on changes on the government or vendor side that are not included in this analysis.

A histogram of the data showing the distribution of the number of change orders each year from 2007.

  NChgCeil<-ddply(def_all,
               .(SumOfisChangeOrder,
                 StartFY,
                 Ceil),
               plyr::summarise,
               ContractCount=length(CSIScontractID),
               Action.Obligation=sum(Action.Obligation))

NChgCeil<-ddply(NChgCeil, 
                .(Ceil), 
                transform, 
                pContractByCeil=ContractCount/sum(ContractCount),
                pObligationByCeil=Action.Obligation/sum(Action.Obligation))

NChgCeil$pTotalObligation<-NChgCeil$Action.Obligation/sum(NChgCeil$Action.Obligation,na.rm=TRUE)
NChgCeil$pTotalContract<-NChgCeil$ContractCount/sum(NChgCeil$ContractCount,na.rm=TRUE)
# 
# ggplot(
#   data = subset(NChgCeil,SumOfisChangeOrder>0),
#   aes_string(x = "SumOfisChangeOrder")
#   ) + geom_bar(binwidth=1) + 
#     facet_grid( Ceil ~ .,
#                 scales = "free_y",
#                 space = "free_y") + scale_y_continuous(expand = c(0,50)) +scale_x_continuous(limits=c(0,10))
# 
# 
# 
# ggplot(
#   data = subset(NChgCeil,SumOfisChangeOrder>0),
#   aes_string(x = "Ceil",weight="ContractCount"),
#   main="Number of Contracts with Change Orders\nBy Initial Contract Ceiling")+ 
#   geom_bar()+
#     scale_x_discrete("Initial Cost Ceiling (Current $ Value)")+scale_y_continuous("Number of Contracts with Change Orders")+theme(axis.text.x=element_text(angle=90))
# 
# 
# ggplot(
#   data = subset(NChgCeil,SumOfisChangeOrder>0),
#   aes_string(x = "Ceil",weight="pContractByCeil"),
#   main="Percentage of Contracts going to Contracts with Change Orders\nBy Initial Contract Ceiling")+ geom_bar()+ scale_y_continuous("Percent of Contracts with Change Orders", labels=percent)+
#     scale_x_discrete("Initial Cost Ceiling (Current $ Value)")+theme(axis.text.x=element_text(angle=90))
# 
# 
# ggplot(
#   data =subset(NChgCeil,SumOfisChangeOrder>0),
#   aes_string(x = "Ceil",weight="pObligationByCeil"),
#   main="Percentage of Contract Obligations going to Contracts with Change Orders\nBy Initial Contract Ceiling"
#   )+ geom_bar()+ scale_y_continuous("Percent of Obligations in Cost Ceiling Category", labels=percent)+
#     scale_x_discrete("Initial Cost Ceiling (Current $ Value)")+theme(axis.text.x=element_text(angle=90))
# 
# 
# ggplot(
#   data = subset(NChgCeil,SumOfisChangeOrder>0),
#   aes_string(x = "Ceil",weight="Action.Obligation")
#   )+ geom_bar()+
#     scale_x_discrete("Initial Cost Ceiling (Current $ Value)")+scale_y_continuous("Total Obligated Value of Contracts with Change Orders")+theme(axis.text.x=element_text(angle=90))
# 
# 
# 
# sum(subset(NChgCeil,SumOfisChangeOrder>0)$pTotalObligation)
# sum(subset(NChgCeil,SumOfisChangeOrder>0)$pTotalContract)

This study uses changes in the Base and All Options Value Amount as a way of tracking the potential cost of change orders.

The % Growth in Base and All Options Value Amount form Change Orders is calculated as follows:

Base and All Options Value Amount increases for all Change Order Modifications/ Base and All Options Value Amount from the original unmodified contract transaction

A histogram of the data showing the distribution of the initial amount of the specific change order

# 
# pChgCeil<-ddply(def_all,
#              .(pChange3Sig,
#                StartFY,
#                Ceil),
#              plyr::summarise,
#              ContractCount=length(CSIScontractID),
#              Action.Obligation=sum(Action.Obligation))
# 
# pChgCeil<-ddply(pChgCeil, 
#                 .(Ceil), 
#                 transform, 
#                 pContractByCeil=ContractCount/sum(ContractCount),
#                 pObligationByCeil=Action.Obligation/sum(Action.Obligation))
# 
# pChgCeil<-ddply(pChgCeil, 
#                 .(StartFY), 
#                 transform, 
#                 pContractByFYear=ContractCount/sum(ContractCount),
#                 pObligationByFYear=Action.Obligation/sum(Action.Obligation))
# 
# pChgCeil$pChange3Sig[pChgCeil$pChange3Sig==-Inf]<-NA
# pChgCeil$pChange3Sig[pChgCeil$pChange3Sig==Inf]<-NA
# 
# pChgCeilAverage<-ddply(pChgCeil,
#                 .(Ceil),
#                 plyr::summarise,
#                 mean = wtd.mean(pChange3Sig,ContractCount),
#                 sd   = sqrt(wtd.var(pChange3Sig,ContractCount))
#                 # se   = sd / sqrt(ContractCount)
#                 )
# 
# 
# 
# 
# pChgCeil$pTotalObligation<-pChgCeil$Action.Obligation/sum(NChgCeil$Action.Obligation,na.rm=TRUE)
# pChgCeil$pTotalContract<-pChgCeil$ContractCount/sum(NChgCeil$ContractCount,na.rm=TRUE)
# 
# pChgCeil$CRai <- cut2(
#     pChgCeil$pChange3Sig,c(
#                                               -0.001,
#                                               0.001,
#                                               0.15)
#     )
# 
# 
# 
# ggplot(
#   data = pChgCeil,
#   aes_string(x = "pChange3Sig",
#              weights = "ContractCount")
#   ) + geom_histogram(binwidth=0.01) +
#     facet_grid( Ceil ~ .,
#                 scales = "free_y",
#                 space = "free_y") +
#     scale_y_log10("Number of Contracts")+
#     scale_x_continuous("Percentage of Cost-Ceiling-Raising Change Orders b
#                        y\nInitial Cost Ceiling (Current $ Value)",
#                        limits=c(-1.25,1.25), labels=percent)+
#     theme(axis.text.x=element_text(angle=90,size=1))+
#   geom_vline(data=pChgCeilAverage,aes(xintercept=mean),color="red")
# 
# 
# 
# 
# # ggplot(
# #   data = subset(pChgCeil,is.numeric(pChange3Sig)&is.finite(pChange3Sig)),
# #   aes_string(y = "pChange3Sig")
# #   ) + geom_boxplot() 
# 
# ggplot(
#   data = subset(pChgCeil,is.finite(pChange3Sig)&
#                   !is.na(pChange3Sig)&StartFY>2007&StartFY<=2014&pChange3Sig!=0),
#   aes(y = pChange3Sig,x=factor(StartFY),
#              weight = ContractCount)
#   ) + geom_violin() + 
#     facet_grid( Ceil ~ .) +
#     # scale_y_log10("Number of Contracts",limits=c(-1.25,1.25))+
#      scale_y_continuous(
#        "Cost-Ceiling-Raising Change Orders Percent (Current $ Value)",
#                        limits=c(-0.05,0.05), labels=percent)
#     # theme(axis.text.x=element_text(angle=90,size=1))
# 
# 
# 
# ggplot(
#   data = subset(pChgCeil,is.finite(pChange3Sig)&
#                   !is.na(pChange3Sig)&StartFY>2007&StartFY<=2014),
#   aes(y = pChange3Sig,x=factor(StartFY),
#              weight = ContractCount)
#   ) + geom_boxplot(outlier.shape = NA,notch=TRUE) + 
#     facet_grid( Ceil ~ .) +
#     # scale_y_log10("Number of Contracts",limits=c(-1.25,1.25))+
#      scale_y_continuous(
#        "Cost-Ceiling-Raising Change Orders Percent (Current $ Value)",
#                        limits=c(-0.05,0.05), labels=percent)
#     # theme(axis.text.x=element_text(angle=90,size=1))
# 
# 
# # Percent of Contracts breakdown by StartYear
# ggplot(
#   data = subset(pChgCeil,
#                 StartFY>=2007 & 
#                   StartFY<=2015 &
#                   pChange3Sig!=0),
#   aes_string(x = "pChange3Sig",
#              weight="pContractByFYear")
#   ) + geom_histogram(binwidth=0.01) +
#   scale_x_continuous("Percentage of Cost-Ceiling-Raising Change Orders b
#                        y\nInitial Cost Ceiling (Current $ Value)",
#                        limits=c(-1.25,1.25), labels=percent)+
#   scale_y_continuous()+
#   facet_wrap("StartFY")
# 
# 
# # Percent of Contracts breakdown by Ceiling
# ggplot(
#   data = subset(pChgCeil,pChange3Sig!=0),
#   aes_string(x = "pChange3Sig",weight="pContractByCeil",fill="CRai")#
#   )+ geom_histogram(binwidth=0.05)+
# #     scale_x_continuous("Percentage of Cost-Ceiling-Raising Change Orders by\nInitial Cost Ceiling (Current $ Value)")
#     scale_y_continuous("Percent of Contracts", labels=percent)+
#         facet_grid( . ~ Ceil )+scale_x_continuous("Extent of Ceiling Breach in 5% Increments",limits=c(-0.5,1), labels=percent)+theme(axis.text.x=element_text(angle=90),legend.position="bottom")+scale_fill_discrete(name="Extent of Ceiling Breach")
# 
# 
# 
# tapply(pChgCeil$pChange3Sig, pChgCeil$Ceil, summary)
# 
# 
# 
# 
# #Percent of obligations breakdown
# ggplot(
#   data = subset(pChgCeil,pChange3Sig!=0),
#   aes_string(x = "pChange3Sig",weight="pTotalObligation",fill="CRai")#
#   )+ geom_bar(binwidth=0.01)+
# #     scale_x_continuous("Percentage of Obligations  by\nInitial Cost Ceiling (Current $ Value)")
#     scale_y_continuous("Percent of Completed Contracts\n(Weighted by Current $ Obligations)", labels=percent)+
#        # facet_grid( . ~ Term )+
#     scale_x_continuous("Extent of Ceiling Breach \n(Percent Change in Current $ Value in 1% Increments)",labels=percent,limits=c(-0.5,1))+
#     coord_cartesian(xlim=c(-0.5,1))+ theme(axis.text.x=element_text(angle=90),legend.position="bottom")+
#     scale_fill_discrete(name="Extent of Ceiling Breach")
# 
# 
# tapply(pChgCeil$CRai, pChgCeil$Ceil, summary)
# 
# 
# sum(subset(pChgCeil,pChange3Sig>0)$pTotalObligation)
# 
# BreachSummary<-ddply(def_all,
#                      .(Ceil,
#                        pChange3Sig,
#                        SumOfisChangeOrder,
#                        CRai,
#                        Term),
#                      summarise,
#                      pContractByCeil=sum(pContractByCeil),
#                      pObligationByCeil=sum(pObligationByCeil),
#                      pTotalObligation=sum(pTotalObligation))
# 
# 
# 
# ddply(pChgCeil,.(Term,CRai),
#                      summarise,
#                      pTotalObligation=sum(pTotalObligation))

Any Ceiling Breach

BreachedSDurSCeilStatCount<-
  only_complete(def_all) %>%
  group_by(Dur.Simple,
      Ceil.Big,
      StartFY,
      CBre
    ) %>%
    dplyr::summarise(
    Action.Obligation=sum(Action.Obligation),
    Count=length(CSIScontractID),
    metric="Contracts within Period"
)

BreachedSDurSCeilStatCount<-rbind(BreachedSDurSCeilStatCount,
                                    all_labeled(def_all) %>%                
                                    group_by(Dur.Simple,
                                      Ceil.Big,
                                      StartFY,
                                      CBre
                                    ) %>%
                                    dplyr::summarise(
                                    Action.Obligation=sum(Action.Obligation),
                                    Count=length(CSIScontractID),
                                    metric="Early Results for All Contracts"
))

BreachedSDurSCeilStatCount$metric<-factor(BreachedSDurSCeilStatCount$metric,
                                            levels=c("Contracts within Period",
                                                   "Early Results for All Contracts"),
                                            ordered=TRUE)

BreachedSDurSCeilLabels<-
    subset(BreachedSDurSCeilStatCount,metric=="Contracts within Period") %>%
    group_by(Dur.Simple,Ceil.Big) %>%
    dplyr::summarise(
    FacetCount=paste("Count:",prettyNum(sum(Count),big.mark=",")),
    FacetValue=paste(FacetCount,"\nObligated: $",round(sum(Action.Obligation)/1000000000,1),"B",sep="")
    )

Ypos<-max(BreachedSDurSCeilStatCount$Count)


ggplot(BreachedSDurSCeilStatCount,
       aes(x=StartFY,y=Count,color=CBre))+
    geom_line(aes(linetype=metric))+
    geom_point(aes(shape=CBre))+
    geom_text(data=BreachedSDurSCeilLabels,
              aes(x=2007,y=Ypos,label=FacetValue),
              # parse=TRUE,
              hjust=0,
              vjust=1,
              color="black")+
    facet_grid( Dur.Simple  ~ Ceil.Big ) +#
    scale_x_continuous("Contract Starting Fiscal Year")+
    scale_color_manual("Status", values=c("blue","red"))+
    scale_linetype_discrete("Early Results")+
    scale_shape_discrete("Status")+
    scale_y_log10("Number of Contracts (Logorithmic Scale)",label=scales::comma)+
    # geom_errorbar(aes(ymin=mean-se, ymax=mean+se), width=.1)+
    theme(legend.position="bottom") #, position=pd

summary(def_all$StartFY
        )
##    Min. 1st Qu.  Median    Mean 3rd Qu.    Max. 
##    2007    2009    2011    2011    2014    2015
ggplot(def_all,aes(x=l_Days))+geom_histogram()
## `stat_bin()` using `bins = 30`. Pick better value with `binwidth`.
## Warning: Removed 174983 rows containing non-finite values (stat_bin).

ggplot(subset(def_all,UnmodifiedDays<1),aes(x=UnmodifiedDays))+geom_histogram()

View(subset(def_all,Ceil.Big=="0k - <100k" & Dur.Simple=="(~2 years+]"))

Ceiling Breach Quantile

df.QCrai<-only_complete(def_all)%>%
      group_by(StartFY,
               Ceil,
               Dur)%>%
      dplyr::summarise(
      X50 = quantile(pChangeOrderUnmodifiedBaseAndAll, probs = 0.5,na.rm=TRUE),
      X75 = quantile(pChangeOrderUnmodifiedBaseAndAll, probs = 0.75,na.rm=TRUE), 
      X80 = quantile(pChangeOrderUnmodifiedBaseAndAll, probs = 0.80,na.rm=TRUE), 
      X90 = quantile(pChangeOrderUnmodifiedBaseAndAll, probs = 0.90,na.rm=TRUE), 
      X95 = quantile(pChangeOrderUnmodifiedBaseAndAll, probs = 0.95,na.rm=TRUE),
      X99 = quantile(pChangeOrderUnmodifiedBaseAndAll, probs = 0.99,na.rm=TRUE),
      ContractCount=length(CSIScontractID),
             Action.Obligation=sum(Action.Obligation),
      metric="Contracts within Period")


df.QCrai<-melt(df.QCrai,variable.name="Quantile",value.name="pCRai",measure.vars=c(
  "X50",
  "X75",
  "X80",
  "X90",
  "X95",
  "X99")
)

ggplot(df.QCrai,
       aes(x=StartFY,y=pCRai,color=Quantile))+
  geom_line()+
  scale_y_continuous(labels=percent)+
  facet_grid(Ceil~Dur)+labs(title="All Six Quantiles")

ggplot(subset(df.QCrai,
                !Quantile %in% c("X99")),
       aes(x=StartFY,y=pCRai,color=Quantile))+
  geom_line()+
  facet_grid(Ceil~Dur#,
             # scales="free_y",
             # space="free_y"
             )+
  scale_y_continuous(labels=percent)+
  labs(title="Five Quantiles (no 99%)")

#Test to see which percentiles register at all.
df.ecdf<-def_all %>%
      group_by(Ceil,
               Dur)%>%
      dplyr::summarise(
      r001 = ecdf(pChangeOrderUnmodifiedBaseAndAll)(0.001),
      r01 = ecdf(pChangeOrderUnmodifiedBaseAndAll)(0.01),
      r05 = ecdf(pChangeOrderUnmodifiedBaseAndAll)(0.01)
)

# df.ecdf<-subset(df.ecdf,StartFY>=2007&StartFY<=2014)

# test<-tapply(def_all, pChangeOrderUnmodifiedBaseAndAll, ecdf)

Simple.Dur / Ceiling.Big

df.QCrai.SDur<-only_complete(def_all) %>%
      group_by(StartFY,
               Ceil.Big,
               Dur.Simple) %>%
      dplyr::summarise(
      X50 = quantile(pChangeOrderUnmodifiedBaseAndAll, probs = 0.5,na.rm=TRUE),
      X75 = quantile(pChangeOrderUnmodifiedBaseAndAll, probs = 0.75,na.rm=TRUE), 
      X80 = quantile(pChangeOrderUnmodifiedBaseAndAll, probs = 0.80,na.rm=TRUE), 
      X90 = quantile(pChangeOrderUnmodifiedBaseAndAll, probs = 0.90,na.rm=TRUE), 
      X95 = quantile(pChangeOrderUnmodifiedBaseAndAll, probs = 0.95,na.rm=TRUE),
      X99 = quantile(pChangeOrderUnmodifiedBaseAndAll, probs = 0.99,na.rm=TRUE),
      ContractCount=length(CSIScontractID),
             Action.Obligation=sum(Action.Obligation),
      metric="Contracts within Period")



df.QCrai.SDur<-rbind(df.QCrai.SDur,
                all_labeled(def_all)%>%
      group_by(StartFY,
               Ceil.Big,
               Dur.Simple)%>%
      dplyr::summarise( 
      X50 = quantile(pChangeOrderUnmodifiedBaseAndAll, probs = 0.5,na.rm=TRUE),
      X75 = quantile(pChangeOrderUnmodifiedBaseAndAll, probs = 0.75,na.rm=TRUE), 
      X80 = quantile(pChangeOrderUnmodifiedBaseAndAll, probs = 0.80,na.rm=TRUE), 
      X90 = quantile(pChangeOrderUnmodifiedBaseAndAll, probs = 0.90,na.rm=TRUE), 
      X95 = quantile(pChangeOrderUnmodifiedBaseAndAll, probs = 0.95,na.rm=TRUE),
      X99 = quantile(pChangeOrderUnmodifiedBaseAndAll, probs = 0.99,na.rm=TRUE),
      ContractCount=length(CSIScontractID),
             Action.Obligation=sum(Action.Obligation),
      metric="Early Results for All Contracts")
)


df.QCrai.SDur<-melt(df.QCrai.SDur,
                      variable.name="Quantile",value.name="pCRai",measure.vars=c(
  "X50",
  "X75",
  "X80",
  "X90",
  "X95",
  "X99")
)

df.QCrai.SDur$Quantile<-factor(df.QCrai.SDur$Quantile,
  levels=c("X50",
  "X75",
  "X80",
  "X90",
  "X95",
  "X99"),
  labels=c("50th Percentile",
  "75th Percentile",
  "80th Percentile",
  "90th Percentile",
  "95th Percentile",
  "99th Percentile")
)

CRaiSDurCeilLabels<-ddply(
  subset(df.QCrai.SDur,Quantile=="50th Percentile" &
           metric=="Contracts within Period"),
    .(Dur.Simple,Ceil.Big),
    plyr::summarise,
    FacetCount=paste("Count:",prettyNum(sum(ContractCount),big.mark=",")),
    FacetValue=paste(FacetCount,"\nObligated: $",round(sum(Action.Obligation)/1000000000,1),"B",sep="")
    )

Ypos<-max(subset(df.QCrai.SDur,
                   !Quantile %in% c("99th Percentile")
                 )$pCRai,na.rm=TRUE)


CRaiOutput<-ggplot(subset(df.QCrai.SDur,
                !Quantile %in% c("99th Percentile",
                                 "75th Percentile")),
       aes(x=StartFY,y=pCRai,color=Quantile))+
  geom_line(aes(linetype=metric))+
  geom_point(aes(shape=Quantile))+
  geom_text(data=CRaiSDurCeilLabels,
              aes(x=2007,y=Ypos,label=FacetValue),
              # parse=TRUE,
              hjust=0,
              vjust=1,
              color="black")+
  facet_grid(Dur.Simple~Ceil.Big)+
               scale_y_continuous("Cost-Ceiling-Raising Change Orders Percent (Current $ Value)",
                                  labels=percent)+
  scale_x_continuous("Contract Starting Fiscal Year")+
  scale_linetype_discrete("Early Results")+
  theme(legend.position="bottom") #, position=pd

CRaiOutput

ggsave("CRaiOutput.png",
       CRaiOutput,
       width=8,
       height=7,
       dpi=600)

ggplot(subset(df.QCrai.SDur,
                # !Quantile %in% c("99th Percentile")
                !Ceil.Big %in% c("15k - <100k","0 - <15k")
              ),
       aes(x=StartFY,
           y=pCRai,
           color=Quantile))+
  geom_line(aes(linetype=metric))+
  facet_grid(Ceil.Big~Dur.Simple,
             scales="free_y",
             space="free_y")+
  scale_y_continuous(labels=percent)

#Test to see which percentiles register at all.
df.ecdf<-ddply(def_all,
      .(Ceil.Big,
               Dur.Simple),
      summarise, 
      r001 = ecdf(pChangeOrderUnmodifiedBaseAndAll)(0.001),
      r01 = ecdf(pChangeOrderUnmodifiedBaseAndAll)(0.01),
      r05 = ecdf(pChangeOrderUnmodifiedBaseAndAll)(0.01)
)

# df.ecdf<-subset(df.ecdf,StartFY>=2007&StartFY<=2014)


CRaiSDurCeilFYearSummary<-ddply(
  subset(df.QCrai.SDur,Quantile=="50th Percentile" &
           metric=="Contracts within Period"),
    .(Dur.Simple,Ceil.Big,StartFY),
    plyr::summarise,
    FacetCount=paste("Count:",prettyNum(sum(ContractCount),big.mark=",")),
    FacetValue=paste(FacetCount,"\nObligated: $",round(sum(Action.Obligation)/1000000000,1),"B",sep="")
    )

DurBoundary<-subset(def_all,Ceil=="75m+"&
         Dur=="(~2 years+]"&
         StartFY==2013&
         UnmodifiedCurrentCompletionDate<as.Date("2015-09-30")
         )

Dur.Simple / Ceiling.Simple

df.QCrai.SDur<-only_complete(def_all) %>%
      group_by(StartFY,
               Ceil.Simple,
               Dur.Simple) %>%
      dplyr::summarise(
      X50 = quantile(pChangeOrderUnmodifiedBaseAndAll, probs = 0.5,na.rm=TRUE),
      X75 = quantile(pChangeOrderUnmodifiedBaseAndAll, probs = 0.75,na.rm=TRUE), 
      X80 = quantile(pChangeOrderUnmodifiedBaseAndAll, probs = 0.80,na.rm=TRUE), 
      X90 = quantile(pChangeOrderUnmodifiedBaseAndAll, probs = 0.90,na.rm=TRUE), 
      X95 = quantile(pChangeOrderUnmodifiedBaseAndAll, probs = 0.95,na.rm=TRUE),
      X99 = quantile(pChangeOrderUnmodifiedBaseAndAll, probs = 0.99,na.rm=TRUE),
      ContractCount=length(CSIScontractID),
             Action.Obligation=sum(Action.Obligation),
      metric="Contracts within Period")



df.QCrai.SDur<-rbind(df.QCrai.SDur,
                all_labeled(def_all)%>%
      group_by(StartFY,
               Ceil.Simple,
               Dur.Simple)%>%
      dplyr::summarise( 
      X50 = quantile(pChangeOrderUnmodifiedBaseAndAll, probs = 0.5,na.rm=TRUE),
      X75 = quantile(pChangeOrderUnmodifiedBaseAndAll, probs = 0.75,na.rm=TRUE), 
      X80 = quantile(pChangeOrderUnmodifiedBaseAndAll, probs = 0.80,na.rm=TRUE), 
      X90 = quantile(pChangeOrderUnmodifiedBaseAndAll, probs = 0.90,na.rm=TRUE), 
      X95 = quantile(pChangeOrderUnmodifiedBaseAndAll, probs = 0.95,na.rm=TRUE),
      X99 = quantile(pChangeOrderUnmodifiedBaseAndAll, probs = 0.99,na.rm=TRUE),
      ContractCount=length(CSIScontractID),
             Action.Obligation=sum(Action.Obligation),
      metric="Early Results for All Contracts")
)


df.QCrai.SDur<-melt(df.QCrai.SDur,
                      variable.name="Quantile",value.name="pCRai",measure.vars=c(
  "X50",
  "X75",
  "X80",
  "X90",
  "X95",
  "X99")
)

df.QCrai.SDur$Quantile<-factor(df.QCrai.SDur$Quantile,
  levels=c("X50",
  "X75",
  "X80",
  "X90",
  "X95",
  "X99"),
  labels=c("50th Percentile",
  "75th Percentile",
  "80th Percentile",
  "90th Percentile",
  "95th Percentile",
  "99th Percentile")
)

CRaiSDurCeilLabels<-ddply(
  subset(df.QCrai.SDur,Quantile=="50th Percentile" &
           metric=="Contracts within Period"),
    .(Dur.Simple,Ceil.Simple),
    plyr::summarise,
    FacetCount=paste("Count:",prettyNum(sum(ContractCount),big.mark=",")),
    FacetValue=paste(FacetCount,"\nObligated: $",round(sum(Action.Obligation)/1000000000,1),"B",sep="")
    )

Ypos<-max(subset(df.QCrai.SDur,
                   !Quantile %in% c("99th Percentile")
                 )$pCRai,na.rm=TRUE)


CRaiOutput<-ggplot(subset(df.QCrai.SDur,
                !Quantile %in% c("99th Percentile",
                                 "75th Percentile")),
       aes(x=StartFY,y=pCRai,color=Quantile))+
  geom_line(aes(linetype=metric))+
  geom_point(aes(shape=Quantile))+
  geom_text(data=CRaiSDurCeilLabels,
              aes(x=2007,y=Ypos,label=FacetValue),
              # parse=TRUE,
              hjust=0,
              vjust=1,
              color="black")+
  facet_grid(Dur.Simple~Ceil.Simple)+
               scale_y_continuous("Cost-Ceiling-Raising Change Orders Percent (Current $ Value)",
                                  labels=percent)+
  scale_x_continuous("Contract Starting Fiscal Year")+
  scale_linetype_discrete("Early Results")+
  theme(legend.position="bottom") #, position=pd

CRaiOutput

ggsave("CRaiOutput.png",
       CRaiOutput,
       width=8,
       height=7,
       dpi=600)

ggplot(subset(df.QCrai.SDur,
                # !Quantile %in% c("99th Percentile")
                !Ceil.Simple %in% c("15k - <100k","0 - <15k")
              ),
       aes(x=StartFY,
           y=pCRai,
           color=Quantile))+
  geom_line(aes(linetype=metric))+
  facet_grid(Ceil.Simple~Dur.Simple,
             scales="free_y",
             space="free_y")+
  scale_y_continuous(labels=percent)

#Test to see which percentiles register at all.
df.ecdf<-ddply(def_all,
      .(Ceil.Simple,
               Dur.Simple),
      summarise, 
      r001 = ecdf(pChangeOrderUnmodifiedBaseAndAll)(0.001),
      r01 = ecdf(pChangeOrderUnmodifiedBaseAndAll)(0.01),
      r05 = ecdf(pChangeOrderUnmodifiedBaseAndAll)(0.01)
)

# df.ecdf<-subset(df.ecdf,StartFY>=2007&StartFY<=2014)


CRaiSDurCeilFYearSummary<-ddply(
  subset(df.QCrai.SDur,Quantile=="50th Percentile" &
           metric=="Contracts within Period"),
    .(Dur.Simple,Ceil.Simple,StartFY),
    plyr::summarise,
    FacetCount=paste("Count:",prettyNum(sum(ContractCount),big.mark=",")),
    FacetValue=paste(FacetCount,"\nObligated: $",round(sum(Action.Obligation)/1000000000,1),"B",sep="")
    )

DurBoundary<-subset(def_all,Ceil=="75m+"&
         Dur=="(~2 years+]"&
         StartFY==2013&
         UnmodifiedCurrentCompletionDate<as.Date("2015-09-30")
         )
# df.QNWork.SDur<-ddply(subset(def_all,
#                                         !is.na(Dur.Simple) & 
#                !is.na(Ceil.Big) &
#                !is.na(pNewWorkUnmodifiedBaseAndAll) &
#                StartFY>=2007 & 
#                StartFY<=2014 &                
#                (LastCurrentCompletionDate<=as.Date("2015-09-30") |
#                     IsClosed==1) &
#                UnmodifiedCurrentCompletionDate<as.Date("2015-09-30")),
#       .(StartFY,
#                Ceil.Big,
#                Dur.Simple),
#       summarise, 
#       X50 = quantile(pNewWorkUnmodifiedBaseAndAll, probs = 0.5,na.rm=TRUE),
#       X75 = quantile(pNewWorkUnmodifiedBaseAndAll, probs = 0.75,na.rm=TRUE), 
#       X80 = quantile(pNewWorkUnmodifiedBaseAndAll, probs = 0.80,na.rm=TRUE), 
#       X90 = quantile(pNewWorkUnmodifiedBaseAndAll, probs = 0.90,na.rm=TRUE), 
#       X95 = quantile(pNewWorkUnmodifiedBaseAndAll, probs = 0.95,na.rm=TRUE),
#       X99 = quantile(pNewWorkUnmodifiedBaseAndAll, probs = 0.99,na.rm=TRUE),
#       ContractCount=length(CSIScontractID),
#              Action.Obligation=sum(Action.Obligation),
#       metric="Contracts within Period")
# 
# 
# 
# df.QNWork.SDur<-rbind(df.QNWork.SDur,
#                 ddply(subset(def_all,
#                                                !is.na(Dur.Simple) & 
#                                                !is.na(Ceil.Big) &
#                                                !is.na(pNewWorkUnmodifiedBaseAndAll) &
#                                                StartFY>=2007 & 
#                                                StartFY<=2014),
#       .(StartFY,
#                Ceil.Big,
#                Dur.Simple),
#       summarise, 
#       X50 = quantile(pNewWorkUnmodifiedBaseAndAll, probs = 0.5,na.rm=TRUE),
#       X75 = quantile(pNewWorkUnmodifiedBaseAndAll, probs = 0.75,na.rm=TRUE), 
#       X80 = quantile(pNewWorkUnmodifiedBaseAndAll, probs = 0.80,na.rm=TRUE), 
#       X90 = quantile(pNewWorkUnmodifiedBaseAndAll, probs = 0.90,na.rm=TRUE), 
#       X95 = quantile(pNewWorkUnmodifiedBaseAndAll, probs = 0.95,na.rm=TRUE),
#       X99 = quantile(pNewWorkUnmodifiedBaseAndAll, probs = 0.99,na.rm=TRUE),
#       ContractCount=length(CSIScontractID),
#              Action.Obligation=sum(Action.Obligation),
#       metric="Early Results for All Contracts")
# )
# 
# 
# df.QNWork.SDur<-melt(df.QNWork.SDur,
#                       variable.name="Quantile",value.name="pNWork",measure.vars=c(
#   "X50",
#   "X75",
#   "X80",
#   "X90",
#   "X95",
#   "X99")
# )
# 
# df.QNWork.SDur$Quantile<-factor(df.QNWork.SDur$Quantile,
#   levels=c("X50",
#   "X75",
#   "X80",
#   "X90",
#   "X95",
#   "X99"),
#   labels=c("50th Percentile",
#   "75th Percentile",
#   "80th Percentile",
#   "90th Percentile",
#   "95th Percentile",
#   "99th Percentile")
# )
# 
# NWorkSDurCeilLabels<-ddply(
#   subset(df.QNWork.SDur,Quantile=="50th Percentile" &
#            metric=="Contracts within Period"),
#     .(Dur.Simple,Ceil.Big),
#     plyr::summarise,
#     FacetCount=paste("Count:",prettyNum(sum(ContractCount),big.mark=",")),
#     FacetValue=paste(FacetCount,"\nObligated: $",round(sum(Action.Obligation)/1000000000,1),"B",sep="")
#     )
# 
# Ypos<-max(subset(df.QNWork.SDur,
#                    !Quantile %in% c("99th Percentile")
#                  )$pNWork,na.rm=TRUE)
# 
# 
# NWorkOutput<-ggplot(subset(df.QNWork.SDur,
#                 !Quantile %in% c("99th Percentile",
#                                  "75th Percentile")),
#        aes(x=StartFY,y=pNWork,color=Quantile))+
#   geom_line(aes(linetype=metric))+
#   geom_point(aes(shape=Quantile))+
#   geom_text(data=NWorkSDurCeilLabels,
#               aes(x=2007,y=Ypos,label=FacetValue),
#               # parse=TRUE,
#               hjust=0,
#               vjust=1,
#               color="black")+
#   facet_grid(Dur.Simple~Ceil.Big)+
#                scale_y_continuous("New Work Orders Percent (Current $ Value)",
#                                   labels=percent)+
#   scale_x_discrete("Contract Starting Fiscal Year")+
#   scale_linetype_discrete("Early Results")+
#   theme(legend.position="bottom") #, position=pd
# 
# NWorkOutput
# 
# 
# ggplot(subset(df.QNWork.SDur,
#                 # !Quantile %in% c("99th Percentile")
#                 !Ceil.Big %in% c("15k - <100k","0 - <15k")
#               ),
#        aes(x=StartFY,
#            y=pNWork,
#            color=Quantile))+
#   geom_line(aes(linetype=metric))+
#   facet_grid(Ceil.Big~Dur.Simple,
#              scales="free_y",
#              space="free_y")+
#   scale_y_continuous(labels=percent)
# 
# #Test to see which percentiles register at all.
# df.ecdf<-ddply(def_all,
#       .(Ceil.Big,
#                Dur.Simple),
#       summarise, 
#       r001 = ecdf(pNewWorkUnmodifiedBaseAndAll)(0.001),
#       r01 = ecdf(pNewWorkUnmodifiedBaseAndAll)(0.01),
#       r05 = ecdf(pNewWorkUnmodifiedBaseAndAll)(0.01)
# )
# 
# # df.ecdf<-subset(df.ecdf,StartFY>=2007&StartFY<=2014)
# 
# 
# NWorkSDurCeilFYearSummary<-ddply(
#   subset(df.QNWork.SDur,Quantile=="50th Percentile" &
#            metric=="Contracts within Period"),
#     .(Dur.Simple,Ceil.Big,StartFY),
#     plyr::summarise,
#     FacetCount=paste("Count:",prettyNum(sum(ContractCount),big.mark=",")),
#     FacetValue=paste(FacetCount,"\nObligated: $",round(sum(Action.Obligation)/1000000000,1),"B",sep="")
#     )
# 
# DurBoundary<-subset(def_all,Ceil=="75m+"&
#          Dur=="(~2 years+]"&
#          StartFY==2013&
#          UnmodifiedCurrentCompletionDate<as.Date("2015-09-30")
#          )

Dur.Simple / Ceiling.1m

df.QCrai.SDur<-only_complete(def_all) %>%
      group_by(StartFY,
               Ceil.1m,
               Dur.Simple) %>%
      dplyr::summarise(
      X50 = quantile(pChangeOrderUnmodifiedBaseAndAll, probs = 0.5,na.rm=TRUE),
      X75 = quantile(pChangeOrderUnmodifiedBaseAndAll, probs = 0.75,na.rm=TRUE), 
      X80 = quantile(pChangeOrderUnmodifiedBaseAndAll, probs = 0.80,na.rm=TRUE), 
      X90 = quantile(pChangeOrderUnmodifiedBaseAndAll, probs = 0.90,na.rm=TRUE), 
      X95 = quantile(pChangeOrderUnmodifiedBaseAndAll, probs = 0.95,na.rm=TRUE),
      X99 = quantile(pChangeOrderUnmodifiedBaseAndAll, probs = 0.99,na.rm=TRUE),
      ContractCount=length(CSIScontractID),
             Action.Obligation=sum(Action.Obligation),
      metric="Contracts within Period")



df.QCrai.SDur<-rbind(df.QCrai.SDur,
                all_labeled(def_all)%>%
      group_by(StartFY,
               Ceil.1m,
               Dur.Simple)%>%
      dplyr::summarise( 
      X50 = quantile(pChangeOrderUnmodifiedBaseAndAll, probs = 0.5,na.rm=TRUE),
      X75 = quantile(pChangeOrderUnmodifiedBaseAndAll, probs = 0.75,na.rm=TRUE), 
      X80 = quantile(pChangeOrderUnmodifiedBaseAndAll, probs = 0.80,na.rm=TRUE), 
      X90 = quantile(pChangeOrderUnmodifiedBaseAndAll, probs = 0.90,na.rm=TRUE), 
      X95 = quantile(pChangeOrderUnmodifiedBaseAndAll, probs = 0.95,na.rm=TRUE),
      X99 = quantile(pChangeOrderUnmodifiedBaseAndAll, probs = 0.99,na.rm=TRUE),
      ContractCount=length(CSIScontractID),
             Action.Obligation=sum(Action.Obligation),
      metric="Early Results for All Contracts")
)


df.QCrai.SDur<-melt(df.QCrai.SDur,
                      variable.name="Quantile",value.name="pCRai",measure.vars=c(
  "X50",
  "X75",
  "X80",
  "X90",
  "X95",
  "X99")
)

df.QCrai.SDur$Quantile<-factor(df.QCrai.SDur$Quantile,
  levels=c("X50",
  "X75",
  "X80",
  "X90",
  "X95",
  "X99"),
  labels=c("50th Percentile",
  "75th Percentile",
  "80th Percentile",
  "90th Percentile",
  "95th Percentile",
  "99th Percentile")
)

CRaiSDurCeilLabels<-ddply(
  subset(df.QCrai.SDur,Quantile=="50th Percentile" &
           metric=="Contracts within Period"),
    .(Dur.Simple,Ceil.1m),
    plyr::summarise,
    FacetCount=paste("Count:",prettyNum(sum(ContractCount),big.mark=",")),
    FacetValue=paste(FacetCount,"\nObligated: $",round(sum(Action.Obligation)/1000000000,1),"B",sep="")
    )

Ypos<-max(subset(df.QCrai.SDur,
                   !Quantile %in% c("99th Percentile")
                 )$pCRai,na.rm=TRUE)


CRaiOutput<-ggplot(subset(df.QCrai.SDur,
                !Quantile %in% c("99th Percentile",
                                 "75th Percentile")),
       aes(x=StartFY,y=pCRai,color=Quantile))+
  geom_line(aes(linetype=metric))+
  geom_point(aes(shape=Quantile))+
  geom_text(data=CRaiSDurCeilLabels,
              aes(x=2007,y=Ypos,label=FacetValue),
              # parse=TRUE,
              hjust=0,
              vjust=1,
              color="black")+
  facet_grid(Dur.Simple~Ceil.1m)+
               scale_y_continuous("Cost-Ceiling-Raising Change Orders Percent (Current $ Value)",
                                  labels=percent)+
  scale_x_continuous("Contract Starting Fiscal Year")+
  scale_linetype_discrete("Early Results")+
  theme(legend.position="bottom") #, position=pd

CRaiOutput

ggsave("CRaiOutput.png",
       CRaiOutput,
       width=8,
       height=7,
       dpi=600)

ggplot(subset(df.QCrai.SDur,
                # !Quantile %in% c("99th Percentile")
                !Ceil.1m %in% c("15k - <100k","0 - <15k")
              ),
       aes(x=StartFY,
           y=pCRai,
           color=Quantile))+
  geom_line(aes(linetype=metric))+
  facet_grid(Ceil.1m~Dur.Simple,
             scales="free_y",
             space="free_y")+
  scale_y_continuous(labels=percent)

#Test to see which percentiles register at all.
df.ecdf<-ddply(def_all,
      .(Ceil.1m,
               Dur.Simple),
      summarise, 
      r001 = ecdf(pChangeOrderUnmodifiedBaseAndAll)(0.001),
      r01 = ecdf(pChangeOrderUnmodifiedBaseAndAll)(0.01),
      r05 = ecdf(pChangeOrderUnmodifiedBaseAndAll)(0.01)
)

# df.ecdf<-subset(df.ecdf,StartFY>=2007&StartFY<=2014)


CRaiSDurCeilFYearSummary<-ddply(
  subset(df.QCrai.SDur,Quantile=="50th Percentile" &
           metric=="Contracts within Period"),
    .(Dur.Simple,Ceil.1m,StartFY),
    plyr::summarise,
    FacetCount=paste("Count:",prettyNum(sum(ContractCount),big.mark=",")),
    FacetValue=paste(FacetCount,"\nObligated: $",round(sum(Action.Obligation)/1000000000,1),"B",sep="")
    )

DurBoundary<-subset(def_all,Ceil=="75m+"&
         Dur=="(~2 years+]"&
         StartFY==2013&
         UnmodifiedCurrentCompletionDate<as.Date("2015-09-30")
         )

# View(subset(def_all,Ceil.Big=="75m+" & Dur.Simple=="(~2 years+]" & StartFY==2014))

# write.csv(subset(def_all,Ceil.Big=="75m+" & Dur.Simple=="(~2 years+]" & StartFY==2014),"Long2014.csv")